[AArch64] Add v1i64 addsat/subsat#142342
Merged
davemgreen merged 1 commit intollvm:mainfrom Jun 3, 2025
Merged
Conversation
Add basic handling for v1i64 saddsat, ssubsat, uaddsat and usubsat. We missed that these were not upgrading in llvm#140454 due to a lack of test coverage. Fixes llvm#142323
Member
|
@llvm/pr-subscribers-backend-aarch64 Author: David Green (davemgreen) ChangesAdd basic handling for v1i64 saddsat, ssubsat, uaddsat and usubsat. We missed that these were not upgrading in #140454 due to a lack of test coverage, and for some reason the generic v1i64 nodes were not being treated as legal like they should. Fixes #142323 Full diff: https://github.com/llvm/llvm-project/pull/142342.diff 9 Files Affected:
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index ae34e6b7dcc3c..854e8891e4e3d 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -1336,7 +1336,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::MUL, MVT::v1i64, Custom);
// Saturates
- for (MVT VT : { MVT::v8i8, MVT::v4i16, MVT::v2i32,
+ for (MVT VT : { MVT::v8i8, MVT::v4i16, MVT::v2i32, MVT::v1i64,
MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
setOperationAction(ISD::SADDSAT, VT, Legal);
setOperationAction(ISD::UADDSAT, VT, Legal);
diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
index 6adf84879052f..d5c907988888f 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
@@ -7764,9 +7764,9 @@ multiclass SIMDThreeScalarD<bit U, bits<5> opc, string asm,
}
multiclass SIMDThreeScalarBHSD<bit U, bits<5> opc, string asm,
- SDPatternOperator OpNode> {
+ SDPatternOperator OpNode, SDPatternOperator SatOp> {
def v1i64 : BaseSIMDThreeScalar<U, 0b111, opc, FPR64, asm,
- [(set (v1i64 FPR64:$Rd), (OpNode (v1i64 FPR64:$Rn), (v1i64 FPR64:$Rm)))]>;
+ [(set (v1i64 FPR64:$Rd), (SatOp (v1i64 FPR64:$Rn), (v1i64 FPR64:$Rm)))]>;
def v1i32 : BaseSIMDThreeScalar<U, 0b101, opc, FPR32, asm, []>;
def v1i16 : BaseSIMDThreeScalar<U, 0b011, opc, FPR16, asm, []>;
def v1i8 : BaseSIMDThreeScalar<U, 0b001, opc, FPR8 , asm, []>;
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index 96d0146c1e752..1bd77c9d80333 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -6336,19 +6336,19 @@ defm FCMGT : SIMDThreeScalarFPCmp<1, 1, 0b100, "fcmgt", AArch64fcmgt>;
defm FMULX : SIMDFPThreeScalar<0, 0, 0b011, "fmulx", int_aarch64_neon_fmulx, HasNEONandIsStreamingSafe>;
defm FRECPS : SIMDFPThreeScalar<0, 0, 0b111, "frecps", int_aarch64_neon_frecps, HasNEONandIsStreamingSafe>;
defm FRSQRTS : SIMDFPThreeScalar<0, 1, 0b111, "frsqrts", int_aarch64_neon_frsqrts, HasNEONandIsStreamingSafe>;
-defm SQADD : SIMDThreeScalarBHSD<0, 0b00001, "sqadd", int_aarch64_neon_sqadd>;
+defm SQADD : SIMDThreeScalarBHSD<0, 0b00001, "sqadd", int_aarch64_neon_sqadd, saddsat>;
defm SQDMULH : SIMDThreeScalarHS< 0, 0b10110, "sqdmulh", int_aarch64_neon_sqdmulh>;
defm SQRDMULH : SIMDThreeScalarHS< 1, 0b10110, "sqrdmulh", int_aarch64_neon_sqrdmulh>;
-defm SQRSHL : SIMDThreeScalarBHSD<0, 0b01011, "sqrshl",int_aarch64_neon_sqrshl>;
-defm SQSHL : SIMDThreeScalarBHSD<0, 0b01001, "sqshl", int_aarch64_neon_sqshl>;
-defm SQSUB : SIMDThreeScalarBHSD<0, 0b00101, "sqsub", int_aarch64_neon_sqsub>;
+defm SQRSHL : SIMDThreeScalarBHSD<0, 0b01011, "sqrshl", int_aarch64_neon_sqrshl, int_aarch64_neon_sqrshl>;
+defm SQSHL : SIMDThreeScalarBHSD<0, 0b01001, "sqshl", int_aarch64_neon_sqshl, int_aarch64_neon_sqshl>;
+defm SQSUB : SIMDThreeScalarBHSD<0, 0b00101, "sqsub", int_aarch64_neon_sqsub, ssubsat>;
defm SRSHL : SIMDThreeScalarD< 0, 0b01010, "srshl", int_aarch64_neon_srshl>;
defm SSHL : SIMDThreeScalarD< 0, 0b01000, "sshl", int_aarch64_neon_sshl>;
defm SUB : SIMDThreeScalarD< 1, 0b10000, "sub", sub>;
-defm UQADD : SIMDThreeScalarBHSD<1, 0b00001, "uqadd", int_aarch64_neon_uqadd>;
-defm UQRSHL : SIMDThreeScalarBHSD<1, 0b01011, "uqrshl",int_aarch64_neon_uqrshl>;
-defm UQSHL : SIMDThreeScalarBHSD<1, 0b01001, "uqshl", int_aarch64_neon_uqshl>;
-defm UQSUB : SIMDThreeScalarBHSD<1, 0b00101, "uqsub", int_aarch64_neon_uqsub>;
+defm UQADD : SIMDThreeScalarBHSD<1, 0b00001, "uqadd", int_aarch64_neon_uqadd, uaddsat>;
+defm UQRSHL : SIMDThreeScalarBHSD<1, 0b01011, "uqrshl", int_aarch64_neon_uqrshl, int_aarch64_neon_uqrshl>;
+defm UQSHL : SIMDThreeScalarBHSD<1, 0b01001, "uqshl", int_aarch64_neon_uqshl, int_aarch64_neon_uqshl>;
+defm UQSUB : SIMDThreeScalarBHSD<1, 0b00101, "uqsub", int_aarch64_neon_uqsub, usubsat>;
defm URSHL : SIMDThreeScalarD< 1, 0b01010, "urshl", int_aarch64_neon_urshl>;
defm USHL : SIMDThreeScalarD< 1, 0b01000, "ushl", int_aarch64_neon_ushl>;
let Predicates = [HasRDM] in {
diff --git a/llvm/test/CodeGen/AArch64/arm64-vqadd.ll b/llvm/test/CodeGen/AArch64/arm64-vqadd.ll
index fa515fe352d8f..ff1fedad43393 100644
--- a/llvm/test/CodeGen/AArch64/arm64-vqadd.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-vqadd.ll
@@ -45,13 +45,7 @@ define <1 x i64> @sqadd1d(ptr %A, ptr %B) nounwind {
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: ldr d1, [x1]
-; CHECK-NEXT: fmov x8, d1
-; CHECK-NEXT: fmov x9, d0
-; CHECK-NEXT: adds x8, x9, x8
-; CHECK-NEXT: asr x9, x8, #63
-; CHECK-NEXT: eor x9, x9, #0x8000000000000000
-; CHECK-NEXT: csel x8, x9, x8, vs
-; CHECK-NEXT: fmov d0, x8
+; CHECK-NEXT: sqadd d0, d0, d1
; CHECK-NEXT: ret
%tmp1 = load <1 x i64>, ptr %A
%tmp2 = load <1 x i64>, ptr %B
@@ -104,11 +98,7 @@ define <1 x i64> @uqadd1d(ptr %A, ptr %B) nounwind {
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: ldr d1, [x1]
-; CHECK-NEXT: fmov x8, d1
-; CHECK-NEXT: fmov x9, d0
-; CHECK-NEXT: adds x8, x9, x8
-; CHECK-NEXT: csinv x8, x8, xzr, lo
-; CHECK-NEXT: fmov d0, x8
+; CHECK-NEXT: uqadd d0, d0, d1
; CHECK-NEXT: ret
%tmp1 = load <1 x i64>, ptr %A
%tmp2 = load <1 x i64>, ptr %B
diff --git a/llvm/test/CodeGen/AArch64/arm64-vqsub.ll b/llvm/test/CodeGen/AArch64/arm64-vqsub.ll
index ffcb7d668d637..b8168eba8cebb 100644
--- a/llvm/test/CodeGen/AArch64/arm64-vqsub.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-vqsub.ll
@@ -45,13 +45,7 @@ define <1 x i64> @sqsub1d(ptr %A, ptr %B) nounwind {
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: ldr d1, [x1]
-; CHECK-NEXT: fmov x8, d1
-; CHECK-NEXT: fmov x9, d0
-; CHECK-NEXT: subs x8, x9, x8
-; CHECK-NEXT: asr x9, x8, #63
-; CHECK-NEXT: eor x9, x9, #0x8000000000000000
-; CHECK-NEXT: csel x8, x9, x8, vs
-; CHECK-NEXT: fmov d0, x8
+; CHECK-NEXT: sqsub d0, d0, d1
; CHECK-NEXT: ret
%tmp1 = load <1 x i64>, ptr %A
%tmp2 = load <1 x i64>, ptr %B
@@ -104,11 +98,7 @@ define <1 x i64> @uqsub1d(ptr %A, ptr %B) nounwind {
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: ldr d1, [x1]
-; CHECK-NEXT: fmov x8, d1
-; CHECK-NEXT: fmov x9, d0
-; CHECK-NEXT: subs x8, x9, x8
-; CHECK-NEXT: csel x8, xzr, x8, lo
-; CHECK-NEXT: fmov d0, x8
+; CHECK-NEXT: uqsub d0, d0, d1
; CHECK-NEXT: ret
%tmp1 = load <1 x i64>, ptr %A
%tmp2 = load <1 x i64>, ptr %B
diff --git a/llvm/test/CodeGen/AArch64/sadd_sat_vec.ll b/llvm/test/CodeGen/AArch64/sadd_sat_vec.ll
index 5f0d4c7bffe53..1c4a504d0ab70 100644
--- a/llvm/test/CodeGen/AArch64/sadd_sat_vec.ll
+++ b/llvm/test/CodeGen/AArch64/sadd_sat_vec.ll
@@ -447,13 +447,9 @@ define <16 x i32> @v16i32(<16 x i32> %x, <16 x i32> %y) nounwind {
define void @v1i64(ptr %px, ptr %py, ptr %pz) nounwind {
; CHECK-SD-LABEL: v1i64:
; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: ldr x8, [x1]
-; CHECK-SD-NEXT: ldr x9, [x0]
-; CHECK-SD-NEXT: adds x8, x9, x8
-; CHECK-SD-NEXT: asr x9, x8, #63
-; CHECK-SD-NEXT: eor x9, x9, #0x8000000000000000
-; CHECK-SD-NEXT: csel x8, x9, x8, vs
-; CHECK-SD-NEXT: fmov d0, x8
+; CHECK-SD-NEXT: ldr d0, [x0]
+; CHECK-SD-NEXT: ldr d1, [x1]
+; CHECK-SD-NEXT: sqadd d0, d0, d1
; CHECK-SD-NEXT: str d0, [x2]
; CHECK-SD-NEXT: ret
;
diff --git a/llvm/test/CodeGen/AArch64/ssub_sat_vec.ll b/llvm/test/CodeGen/AArch64/ssub_sat_vec.ll
index ed79d0158651a..3af858713525b 100644
--- a/llvm/test/CodeGen/AArch64/ssub_sat_vec.ll
+++ b/llvm/test/CodeGen/AArch64/ssub_sat_vec.ll
@@ -449,13 +449,9 @@ define <16 x i32> @v16i32(<16 x i32> %x, <16 x i32> %y) nounwind {
define void @v1i64(ptr %px, ptr %py, ptr %pz) nounwind {
; CHECK-SD-LABEL: v1i64:
; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: ldr x8, [x1]
-; CHECK-SD-NEXT: ldr x9, [x0]
-; CHECK-SD-NEXT: subs x8, x9, x8
-; CHECK-SD-NEXT: asr x9, x8, #63
-; CHECK-SD-NEXT: eor x9, x9, #0x8000000000000000
-; CHECK-SD-NEXT: csel x8, x9, x8, vs
-; CHECK-SD-NEXT: fmov d0, x8
+; CHECK-SD-NEXT: ldr d0, [x0]
+; CHECK-SD-NEXT: ldr d1, [x1]
+; CHECK-SD-NEXT: sqsub d0, d0, d1
; CHECK-SD-NEXT: str d0, [x2]
; CHECK-SD-NEXT: ret
;
diff --git a/llvm/test/CodeGen/AArch64/uadd_sat_vec.ll b/llvm/test/CodeGen/AArch64/uadd_sat_vec.ll
index dcfb5176db12d..3cfb24aaccb11 100644
--- a/llvm/test/CodeGen/AArch64/uadd_sat_vec.ll
+++ b/llvm/test/CodeGen/AArch64/uadd_sat_vec.ll
@@ -439,11 +439,9 @@ define <16 x i32> @v16i32(<16 x i32> %x, <16 x i32> %y) nounwind {
define void @v1i64(ptr %px, ptr %py, ptr %pz) nounwind {
; CHECK-SD-LABEL: v1i64:
; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: ldr x8, [x1]
-; CHECK-SD-NEXT: ldr x9, [x0]
-; CHECK-SD-NEXT: adds x8, x9, x8
-; CHECK-SD-NEXT: csinv x8, x8, xzr, lo
-; CHECK-SD-NEXT: fmov d0, x8
+; CHECK-SD-NEXT: ldr d0, [x0]
+; CHECK-SD-NEXT: ldr d1, [x1]
+; CHECK-SD-NEXT: uqadd d0, d0, d1
; CHECK-SD-NEXT: str d0, [x2]
; CHECK-SD-NEXT: ret
;
diff --git a/llvm/test/CodeGen/AArch64/usub_sat_vec.ll b/llvm/test/CodeGen/AArch64/usub_sat_vec.ll
index 0049aba62d27f..a71cf95a728db 100644
--- a/llvm/test/CodeGen/AArch64/usub_sat_vec.ll
+++ b/llvm/test/CodeGen/AArch64/usub_sat_vec.ll
@@ -436,11 +436,9 @@ define <16 x i32> @v16i32(<16 x i32> %x, <16 x i32> %y) nounwind {
define void @v1i64(ptr %px, ptr %py, ptr %pz) nounwind {
; CHECK-SD-LABEL: v1i64:
; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: ldr x8, [x1]
-; CHECK-SD-NEXT: ldr x9, [x0]
-; CHECK-SD-NEXT: subs x8, x9, x8
-; CHECK-SD-NEXT: csel x8, xzr, x8, lo
-; CHECK-SD-NEXT: fmov d0, x8
+; CHECK-SD-NEXT: ldr d0, [x0]
+; CHECK-SD-NEXT: ldr d1, [x1]
+; CHECK-SD-NEXT: uqsub d0, d0, d1
; CHECK-SD-NEXT: str d0, [x2]
; CHECK-SD-NEXT: ret
;
|
You can test this locally with the following command:git-clang-format --diff HEAD~1 HEAD --extensions cpp -- llvm/lib/Target/AArch64/AArch64ISelLowering.cppView the diff from clang-format here.diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 854e8891e..3fce928a0 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -1336,8 +1336,8 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::MUL, MVT::v1i64, Custom);
// Saturates
- for (MVT VT : { MVT::v8i8, MVT::v4i16, MVT::v2i32, MVT::v1i64,
- MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
+ for (MVT VT : {MVT::v8i8, MVT::v4i16, MVT::v2i32, MVT::v1i64, MVT::v16i8,
+ MVT::v8i16, MVT::v4i32, MVT::v2i64}) {
setOperationAction(ISD::SADDSAT, VT, Legal);
setOperationAction(ISD::UADDSAT, VT, Legal);
setOperationAction(ISD::SSUBSAT, VT, Legal);
|
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Add this suggestion to a batch that can be applied as a single commit.This suggestion is invalid because no changes were made to the code.Suggestions cannot be applied while the pull request is closed.Suggestions cannot be applied while viewing a subset of changes.Only one suggestion per line can be applied in a batch.Add this suggestion to a batch that can be applied as a single commit.Applying suggestions on deleted lines is not supported.You must change the existing code in this line in order to create a valid suggestion.Outdated suggestions cannot be applied.This suggestion has been applied or marked resolved.Suggestions cannot be applied from pending reviews.Suggestions cannot be applied on multi-line comments.Suggestions cannot be applied while the pull request is queued to merge.Suggestion cannot be applied right now. Please check back later.
Add basic handling for v1i64 saddsat, ssubsat, uaddsat and usubsat. We missed that these were not upgrading in #140454 due to a lack of test coverage, and for some reason the generic v1i64 nodes were not being treated as legal like they should.
Fixes #142323